
ssm_filtered <- ssm |> 
  dplyr::filter(Age >= 50)

ssm_filtered <- ssm_filtered |> 
  dplyr::filter(ChildNum >= 1 & Child1_25 == 1)

ssm_filtered <- ssm_filtered |> 
  dplyr::select(SubjHealth, ParentHighEdu, ChildCollege_Max, Sex, BirthCohort, Education, Place_15, Jsei_Fj, EmpStatus_Fj, Unemp, MarStatus, PastHealthIssue)

data_imp <- ssm_filtered |> 
  dplyr::mutate(across(.cols = c(ParentHighEdu, Sex, Education, Place_15, EmpStatus_Fj, Unemp, MarStatus, PastHealthIssue), 
                       .fns =  ~ as.integer((.x)))) |> 
  as.data.frame() |> 
  amelia(m = 5, noms=c("ParentHighEdu"))



# Overall --------------------------------------------------------------

# assignment_overall <- ssm_filtered |> dplyr::filter(ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

## Imputation ----

### IMP: 1 ----

mi_imp <- data_imp$imputations[[1]]

assignment_overall <- mi_imp |> dplyr::filter(ParentHighEdu == "2") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_overall_mi <- mi_imp |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result <- estimate_overall_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result <- estimate_overall_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result)

Result <- Result |> dplyr::mutate(Sex = "Overall", 
                                  Imp = "1")

close_value_oa <- estimate_overall_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate)

close_prop_oa <- estimate_overall_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate)

### IMP: 2 ----

mi_imp <- data_imp$imputations[[2]]

assignment_overall <- mi_imp |> dplyr::filter(ParentHighEdu == "2") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_overall_mi <- mi_imp |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result <- estimate_overall_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Overall", 
                Imp = "2") |> 
  dplyr::bind_rows(Result)

Result <- estimate_overall_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Overall", 
                Imp = "2") |> 
  dplyr::bind_rows(Result)

close_value_oa <- estimate_overall_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa)

close_prop_oa <- estimate_overall_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa)

### IMP: 3 ----

mi_imp <- data_imp$imputations[[3]]

assignment_overall <- mi_imp |> dplyr::filter(ParentHighEdu == "2") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_overall_mi <- mi_imp |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result <- estimate_overall_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Overall", 
                Imp = "3") |> 
  dplyr::bind_rows(Result)

Result <- estimate_overall_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Overall", 
                Imp = "3") |> 
  dplyr::bind_rows(Result)

close_value_oa <- estimate_overall_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa)

close_prop_oa <- estimate_overall_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa)

### IMP: 4 ----

mi_imp <- data_imp$imputations[[4]]

assignment_overall <- mi_imp |> dplyr::filter(ParentHighEdu == "2") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_overall_mi <- mi_imp |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result <- estimate_overall_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Overall", 
                Imp = "4") |> 
  dplyr::bind_rows(Result)

Result <- estimate_overall_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Overall", 
                Imp = "4") |> 
  dplyr::bind_rows(Result)

close_value_oa <- estimate_overall_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa)

close_prop_oa <- estimate_overall_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa)

### IMP: 5 ----

mi_imp <- data_imp$imputations[[5]]

assignment_overall <- mi_imp |> dplyr::filter(ParentHighEdu == "2") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_overall_mi <- mi_imp |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result <- estimate_overall_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Overall", 
                Imp = "5") |> 
  dplyr::bind_rows(Result)

Result <- estimate_overall_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Overall", 
                Imp = "5") |> 
  dplyr::bind_rows(Result)

close_value_oa <- estimate_overall_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa)

close_prop_oa <- estimate_overall_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa)

## Combine Imputation ----

Result <- Result |> dplyr::summarise(across(.cols = estimate:ci.max, 
                                            .fns = ~ mean(.x)),
                                     .by = c(Sex, Estimates, ParentHighEdu))

close_value_oa <- close_value_oa |> dplyr::summarise(mean(estimate)) |> unlist()

close_prop_oa <- close_prop_oa |> dplyr::summarise(mean(estimate)) |> unlist()

# Male --------------------------------------------------------------

# assignment_overall <- ssm_filtered |> dplyr::filter(ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

## Imputation ----

### IMP: 1 ----

mi_imp <- data_imp$imputations[[1]]

assignment_male <- mi_imp |> dplyr::filter(ParentHighEdu == "2" & Sex == 1L) |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_male_mi <- mi_imp |> 
  dplyr::filter(Sex == 1L) |> 
  gapclosing(
    counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result_m <- estimate_male_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result_m <- estimate_male_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result_m)

Result_m <- Result_m |> dplyr::mutate(Sex = "Male", 
                                      Imp = "1")

close_value_oa_m <- estimate_male_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate)

close_prop_oa_m <- estimate_male_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate)

### IMP: 2 ----

mi_imp <- data_imp$imputations[[2]]

assignment_male <- mi_imp |> dplyr::filter(ParentHighEdu == "2" & Sex == 1L) |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_male_mi <- mi_imp |> 
  dplyr::filter(Sex == 1L) |> 
  gapclosing(
    counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result_m <- estimate_male_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Male", 
                Imp = "2") |> 
  dplyr::bind_rows(Result_m)

Result_m <- estimate_male_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Male", 
                Imp = "2") |> 
  dplyr::bind_rows(Result_m)

close_value_oa_m <- estimate_male_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa_m)

close_prop_oa_m <- estimate_male_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa_m)

### IMP: 3 ----

mi_imp <- data_imp$imputations[[3]]

assignment_male <- mi_imp |> dplyr::filter(ParentHighEdu == "2" & Sex == 1L) |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_male_mi <- mi_imp |> 
  dplyr::filter(Sex == 1L) |> 
  gapclosing(
    counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result_m <- estimate_male_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Male", 
                Imp = "3") |> 
  dplyr::bind_rows(Result_m)

Result_m <- estimate_male_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Male", 
                Imp = "3") |> 
  dplyr::bind_rows(Result_m)

close_value_oa_m <- estimate_male_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa_m)

close_prop_oa_m <- estimate_male_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa_m)

### IMP: 4 ----

mi_imp <- data_imp$imputations[[4]]

assignment_male <- mi_imp |> dplyr::filter(ParentHighEdu == "2" & Sex == 1L) |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_male_mi <- mi_imp |> 
  dplyr::filter(Sex == 1L) |> 
  gapclosing(
    counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result_m <- estimate_male_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Male", 
                Imp = "4") |> 
  dplyr::bind_rows(Result_m)

Result_m <- estimate_male_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Male", 
                Imp = "4") |> 
  dplyr::bind_rows(Result_m)

close_value_oa_m <- estimate_male_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa_m)

close_prop_oa_m <- estimate_male_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa_m)

### IMP: 5 ----

mi_imp <- data_imp$imputations[[5]]

assignment_male <- mi_imp |> dplyr::filter(ParentHighEdu == "2" & Sex == 1L) |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_male_mi <- mi_imp |> 
  dplyr::filter(Sex == 1L) |> 
  gapclosing(
    counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result_m <- estimate_male_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Male", 
                Imp = "5") |> 
  dplyr::bind_rows(Result_m)

Result_m <- estimate_male_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Male", 
                Imp = "5") |> 
  dplyr::bind_rows(Result_m)

close_value_oa_m <- estimate_male_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa_m)

close_prop_oa_m <- estimate_male_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa_m)

## Combine Imputation ----

Result_m <- Result_m |> dplyr::summarise(across(.cols = estimate:ci.max, 
                                                .fns = ~ mean(.x)),
                                         .by = c(Sex, Estimates, ParentHighEdu))

close_value_oa_m <- close_value_oa_m |> dplyr::summarise(mean(estimate)) |> unlist()

close_prop_oa_m <- close_prop_oa_m |> dplyr::summarise(mean(estimate)) |> unlist()

# Female --------------------------------------------------------------

# assignment_overall <- ssm_filtered |> dplyr::filter(ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

## Imputation ----

### IMP: 1 ----

mi_imp <- data_imp$imputations[[1]]

assignment_female <- mi_imp |> dplyr::filter(ParentHighEdu == "2" & Sex == 2L) |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_female_mi <- mi_imp |> 
  dplyr::filter(Sex == 2L) |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result_f <- estimate_female_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result_f <- estimate_female_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result_f)

Result_f <- Result_f |> dplyr::mutate(Sex = "Female", 
                                      Imp = "1")

close_value_oa_f <- estimate_female_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate)

close_prop_oa_f <- estimate_female_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate)

### IMP: 2 ----

mi_imp <- data_imp$imputations[[2]]

assignment_female <- mi_imp |> dplyr::filter(ParentHighEdu == "2" & Sex == 2L) |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_female_mi <- mi_imp |> 
  dplyr::filter(Sex == 2L) |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result_f <- estimate_female_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Female", 
                Imp = "2") |> 
  dplyr::bind_rows(Result_f)

Result_f <- estimate_female_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Female", 
                Imp = "2") |> 
  dplyr::bind_rows(Result_f)

close_value_oa_f <- estimate_female_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa_f)

close_prop_oa_f <- estimate_female_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa_f)

### IMP: 3 ----

mi_imp <- data_imp$imputations[[3]]

assignment_female <- mi_imp |> dplyr::filter(ParentHighEdu == "2" & Sex == 2L) |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_female_mi <- mi_imp |> 
  dplyr::filter(Sex == 2L) |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result_f <- estimate_female_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Female", 
                Imp = "3") |> 
  dplyr::bind_rows(Result_f)

Result_f <- estimate_female_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Female", 
                Imp = "3") |> 
  dplyr::bind_rows(Result_f)

close_value_oa_f <- estimate_female_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa_f)

close_prop_oa_f <- estimate_female_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa_f)

### IMP: 4 ----

mi_imp <- data_imp$imputations[[4]]

assignment_female <- mi_imp |> dplyr::filter(ParentHighEdu == "2" & Sex == 2L) |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_female_mi <- mi_imp |> 
  dplyr::filter(Sex == 2L) |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result_f <- estimate_female_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Female", 
                Imp = "4") |> 
  dplyr::bind_rows(Result_f)

Result_f <- estimate_female_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Female", 
                Imp = "4") |> 
  dplyr::bind_rows(Result_f)

close_value_oa_f <- estimate_female_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa_f)

close_prop_oa_f <- estimate_female_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa_f)

### IMP: 5 ----

mi_imp <- data_imp$imputations[[5]]

assignment_female <- mi_imp |> dplyr::filter(ParentHighEdu == "2" & Sex == 2L) |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_female_mi <- mi_imp |> 
  dplyr::filter(Sex == 2L) |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

Result_f <- estimate_female_mi$factual_means |> 
  dplyr::mutate(Estimates = "Factural", 
                Sex = "Female", 
                Imp = "5") |> 
  dplyr::bind_rows(Result_f)

Result_f <- estimate_female_mi$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual", 
                Sex = "Female", 
                Imp = "5") |> 
  dplyr::bind_rows(Result_f)

close_value_oa_f <- estimate_female_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "additive") |> select(estimate) |> dplyr::bind_rows(close_value_oa_f)

close_prop_oa_f <- estimate_female_mi$change_disparities |> dplyr::filter(ParentHighEdu == "2 - 1" & change_type == "proportional") |> select(estimate) |> dplyr::bind_rows(close_prop_oa_f)

## Combine Imputation ----

Result_f <- Result_f |> dplyr::summarise(across(.cols = estimate:ci.max, 
                                                .fns = ~ mean(.x)),
                                         .by = c(Sex, Estimates, ParentHighEdu))

close_value_oa_f <- close_value_oa_f |> dplyr::summarise(mean(estimate)) |> unlist()

close_prop_oa_f <- close_prop_oa_f |> dplyr::summarise(mean(estimate)) |> unlist()

# Integrated Results -------------------------------------------------------

Result <- Result |> dplyr::bind_rows(Result_m) |> dplyr::bind_rows(Result_f)

Result <- Result |> dplyr::mutate(close_prop = dplyr::case_when(Sex == "Overall" ~ close_prop_oa, 
                                                                Sex == "Male" ~ close_prop_m, 
                                                                Sex == "Female" ~ close_prop_f), 
                                  close_value = dplyr::case_when(Sex == "Overall" ~ close_value_oa, 
                                                                 Sex == "Male" ~ close_value_m, 
                                                                 Sex == "Female" ~ close_value_f))

Result <- Result |> 
  dplyr::filter(Estimates == "Factural") |> 
  dplyr::group_by(Sex, ParentHighEdu) |> 
  dplyr::summarise(beginning = mean(estimate)) |> 
  ungroup() |> 
  dplyr::inner_join(Result, by = c("Sex", "ParentHighEdu"))

Result <- Result |> 
  dplyr::filter(Estimates == "Counterfactual") |> 
  dplyr::group_by(Sex, ParentHighEdu) |> 
  dplyr::summarise(end = mean(estimate)) |> 
  ungroup() |> 
  dplyr::inner_join(Result, by = c("Sex", "ParentHighEdu"))

# Visualize the results -------------------------------------------------------

Result |> 
  dplyr::mutate(ParentHighEdu = fct_recode(factor(ParentHighEdu), "Low" = "1", "High" = "2"),
                Estimates = forcats::fct_relevel(Estimates, "Factural", "Counterfactual"), 
                Sex = forcats::fct_relevel(Sex, "Overall", "Male", "Female")) |> 
  dplyr::rename(`Parental education` = ParentHighEdu) |> 
  dplyr::group_by(Estimates) |> 
  dplyr::ungroup() |> 
  ggplot2::ggplot(aes(x = Estimates, y = estimate, ymin = ci.min, ymax = ci.max)) + 
  ggplot2::geom_point(aes(shape = `Parental education`), position = ggplot2::position_dodge(width = .1), size = 2.5) + 
  ggplot2::geom_errorbar(aes(shape = `Parental education`), position = ggplot2::position_dodge(width = .1), width = 0.1) + 
  ggplot2::geom_segment(aes(x = c(1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05), xend = 1.5,
                            y = estimate, yend = estimate)) +
  ggplot2::geom_segment(aes(x = 1.5, xend = 1.5,
                            y = beginning, yend = end), 
                        linetype = "dashed", 
                        linewidth = .5) +
  ggplot2::geom_label(aes(x = 1.5, label = paste0("absolute: ", round(close_value, 2), "pt", "\n", "proportional: ", round((close_prop * 100), 1), "%", "\n", "gap closed"),
                          y = 3.05),
                      position = ggplot2::position_dodge(width = .1),
                      size = 5,
                      show.legend = FALSE) +
  ggplot2::facet_wrap(~ Sex) + 
  ggplot2::labs(x = "Settings", y = "Mean of Subjective Health") + 
  ggplot2::theme_bw(base_size = 18) + 
  ggplot2::scale_shape_manual(values = c(4, 1)) + 
  ggplot2::theme(legend.position = "top") + 
  ggplot2::scale_y_continuous(limits = c(3.0, 3.5))

# Table

Result_table <- Result |> 
  dplyr::select(Sex, ParentHighEdu, estimate, se, ci.min, ci.max, Estimates) |> 
  dplyr::mutate(ParentHighEdu = fct_recode(factor(ParentHighEdu), "Low" = "1", "High" = "2"),
                Estimates = forcats::fct_relevel(Estimates, "Factural", "Counterfactual"), 
                Sex = forcats::fct_relevel(Sex, "Overall", "Male", "Female")) |> 
  dplyr::mutate(across(c(estimate, se, ci.min, ci.max), 
                       .fns = ~ round(.x, 3))) |> 
  tidyr::pivot_wider(names_from = c(ParentHighEdu), 
                     values_from = c(estimate, se, ci.min, ci.max))

write_csv(Result_table, "../submission_ssm/R-R_2024-01-05/fig/supplement/value_mi.csv")

# _____________----




# _____________----


# 10% increase of marginalized group ----

ssm_filtered <- ssm |> 
  dplyr::filter(Age >= 50)

ssm_filtered <- ssm_filtered |> 
  dplyr::filter(ChildNum >= 1 & Child1_25 == 1)

ssm_filtered <- ssm_filtered |> 
  dplyr::select(id, SubjHealth, ParentHighEdu, ChildCollege_Max, Sex, BirthCohort, Age, Education, Place_15, Jsei_Fj, EmpStatus_Fj, Unemp, MarStatus, PastHealthIssue)

ssm_filtered <- ssm_filtered |> 
  tidyr::drop_na()


## Overall --------------------------------------------------------------

assignment_overall_low <- ssm_filtered |> dplyr::filter(ParentHighEdu == "Low") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()
assignment_overall_high <- ssm_filtered |> dplyr::filter(ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

assignment_overall <- case_when(ssm_filtered$ParentHighEdu == "Low" ~ assignment_overall_low + 0.1,
                                ssm_filtered$ParentHighEdu == "High" ~ assignment_overall_high)

estimate_overall <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

#### Extract tibble of factural and counterfactual estimates----------------------
Result <- estimate_overall$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result <- estimate_overall$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result)

Result <- Result |> dplyr::mutate(Sex = "Overall")

close_value_oa <- estimate_overall$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "additive") |> select(estimate) |> unlist()

close_prop_oa <- estimate_overall$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "proportional") |> select(estimate) |> unlist()

# Male --------------------------------------------------------------

ssm_male <- ssm_filtered |> dplyr::filter(Sex == "Male")

assignment_male_low <- ssm_male |> dplyr::filter(ParentHighEdu == "Low") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()
assignment_male_high <- ssm_male |> dplyr::filter(ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

assignment_male <- case_when(ssm_male$ParentHighEdu == "Low" ~ assignment_male_low + 0.1,
                             ssm_male$ParentHighEdu == "High" ~ assignment_male_high)

estimate_male <- ssm_male |> 
  gapclosing(
    counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

#### Extract tibble of factural and counterfactual estimates----------------------
Result_m <- estimate_overall$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result_m <- estimate_overall$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result_m)

Result_m <- Result_m |> dplyr::mutate(Sex = "Male")

close_value_m <- estimate_overall$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "additive") |> select(estimate) |> unlist()

close_prop_m <- estimate_overall$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "proportional") |> select(estimate) |> unlist()

# Female --------------------------------------------------------------

ssm_female <- ssm_filtered |> dplyr::filter(Sex == "Female")

assignment_female_low <- ssm_female |> dplyr::filter(ParentHighEdu == "Low") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()
assignment_female_high <- ssm_female |> dplyr::filter(ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

assignment_female <- case_when(ssm_female$ParentHighEdu == "Low" ~ assignment_female_low + 0.1,
                               ssm_female$ParentHighEdu == "High" ~ assignment_female_high)

estimate_overall <- ssm_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education * Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

#### Extract tibble of factural and counterfactual estimates----------------------
Result_f <- estimate_overall$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result_f <- estimate_overall$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result_f)

Result_f <- Result_f |> dplyr::mutate(Sex = "Female")

close_value_f <- estimate_overall$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "additive") |> select(estimate) |> unlist()

close_prop_f <- estimate_overall$change_disparities |> dplyr::filter(ParentHighEdu == "High - Low" & change_type == "proportional") |> select(estimate) |> unlist()

# Integrated Results -------------------------------------------------------

Result <- Result |> dplyr::bind_rows(Result_m) |> dplyr::bind_rows(Result_f)

Result <- Result |> dplyr::mutate(close_prop = dplyr::case_when(Sex == "Overall" ~ close_prop_oa, 
                                                                Sex == "Male" ~ close_prop_m, 
                                                                Sex == "Female" ~ close_prop_f), 
                                  close_value = dplyr::case_when(Sex == "Overall" ~ close_value_oa, 
                                                                 Sex == "Male" ~ close_value_m, 
                                                                 Sex == "Female" ~ close_value_f))

Result <- Result |> 
  dplyr::filter(Estimates == "Factural") |> 
  dplyr::group_by(Sex, ParentHighEdu) |> 
  dplyr::summarise(beginning = mean(estimate)) |> 
  ungroup() |> 
  dplyr::inner_join(Result, by = c("Sex", "ParentHighEdu"))

Result <- Result |> 
  dplyr::filter(Estimates == "Counterfactual") |> 
  dplyr::group_by(Sex, ParentHighEdu) |> 
  dplyr::summarise(end = mean(estimate)) |> 
  ungroup() |> 
  dplyr::inner_join(Result, by = c("Sex", "ParentHighEdu"))

# Visualize the results -------------------------------------------------------

Result |> 
  dplyr::mutate(Estimates = forcats::fct_relevel(Estimates, "Factural", "Counterfactual"), 
                Sex = forcats::fct_relevel(Sex, "Overall", "Male", "Female")) |> 
  dplyr::rename(`Parental education` = ParentHighEdu) |> 
  dplyr::group_by(Estimates) |> 
  dplyr::ungroup() |> 
  ggplot2::ggplot(aes(x = Estimates, y = estimate, ymin = ci.min, ymax = ci.max)) + 
  ggplot2::geom_point(aes(shape = `Parental education`), position = ggplot2::position_dodge(width = .1), size = 2.5) + 
  ggplot2::geom_errorbar(aes(shape = `Parental education`), position = ggplot2::position_dodge(width = .1), width = 0.1) + 
  ggplot2::geom_segment(aes(x = c(1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05), xend = 1.5,
                            y = estimate, yend = estimate)) +
  ggplot2::geom_segment(aes(x = 1.5, xend = 1.5,
                            y = beginning, yend = end), 
                        linetype = "dashed", 
                        linewidth = .5) +
  ggplot2::geom_label(aes(x = 1.5, label = paste0("absolute: ", round(close_value, 2), "pt", "\n", "proportional: ", round((close_prop * 100), 1), "%", "\n", "gap closed"),
                          y = 3.05),
                      position = ggplot2::position_dodge(width = .1),
                      size = 5,
                      show.legend = FALSE) +
  ggplot2::facet_wrap(~ Sex) + 
  ggplot2::labs(x = "Settings", y = "Mean of Subjective Health") + 
  ggplot2::theme_bw(base_size = 18) + 
  ggplot2::scale_shape_manual(values = c(4, 1)) + 
  ggplot2::theme(legend.position = "top") + 
  ggplot2::scale_y_continuous(limits = c(3.0, 3.6))

# _____________----

# Own Education ----

## % of children's education ----

Prob_childeducation <- ssm_filtered |> 
  dplyr::select(Education, ChildCollege_Max, Sex) |> 
  dplyr::summarise(ChildCollege_Max = round(mean(ChildCollege_Max * 100), 1), 
                   .by = c(Education, Sex))

Prob_childeducation <- ssm_filtered |> 
  dplyr::select(Education, ChildCollege_Max) |> 
  dplyr::group_by(Education) |> 
  dplyr::summarise(ChildCollege_Max = round(mean(ChildCollege_Max * 100), 1)) |> 
  dplyr::mutate(Sex = "Overall") |> 
  dplyr::bind_rows(Prob_childeducation)

Prob_childeducation |>
  dplyr::mutate(Sex = forcats::fct_relevel(Sex, "Overall", "Male", "Female")) |> 
  ggplot2::ggplot(ggplot2::aes(x = Education, y = ChildCollege_Max)) +
  ggplot2::geom_col() + 
  ggplot2::geom_text(aes(label = ChildCollege_Max), size = 10, color = "white", vjust = 3) + 
  ggplot2::facet_wrap(~ Sex) +
  ggplot2::labs(x = "Respondents' Education", y = "% of University Graduation of Children") +
  guides(x = guide_axis(angle = 90)) + 
  ggplot2::theme_bw(base_size = 22)


## Esimate overall --------------------------------------------------------------

assignment_overall <- ssm_filtered |> dplyr::filter(Education == "University or more") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_overall <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + Education),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + Education + ChildCollege_Max),
    category_name = "Education",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

## Extract tibble of factural and counterfactual estimates----------------------
Result <- estimate_overall$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result <- estimate_overall$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result)

Result <- Result |> dplyr::mutate(Sex = "Overall")

close_value_oa <- estimate_overall$change_disparities |> dplyr::filter(Education == "University or more - Less than university" & change_type == "additive") |> select(estimate) |> unlist()

close_prop_oa <- estimate_overall$change_disparities |> dplyr::filter(Education == "University or more - Less than university" & change_type == "proportional") |> select(estimate) |> unlist()

# Esimate male --------------------------------------------------------------

assignment_male <- ssm_filtered |> dplyr::filter(Sex == "Male" & Education == "University or more") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_male <- ssm_filtered |> 
  dplyr::filter(Sex == "Male") |> 
  gapclosing(
    counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + Education),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + Education + ChildCollege_Max),
    category_name = "Education",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

## Extract tibble of factural and counterfactual estimates----------------------
Result_m <- estimate_male$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result_m <- estimate_male$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result_m)

Result_m <- Result_m |> dplyr::mutate(Sex = "Male")

close_value_m <- estimate_male$change_disparities |> dplyr::filter(Education == "University or more - Less than university" & change_type == "additive") |> select(estimate) |> unlist()

close_prop_m <- estimate_male$change_disparities |> dplyr::filter(Education == "University or more - Less than university" & change_type == "proportional") |> select(estimate) |> unlist()


# Esimate female --------------------------------------------------------------

assignment_female <- ssm_filtered |> dplyr::filter(Sex == "Female" & Education == "University or more") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

estimate_female <- ssm_filtered |> 
  dplyr::filter(Sex == "Female") |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + Education),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education * Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + Education + ChildCollege_Max),
    category_name = "Education",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

## Extract tibble of factural and counterfactual estimates----------------------
Result_f <- estimate_female$factual_means |> 
  dplyr::mutate(Estimates = "Factural")

Result_f <- estimate_female$counterfactual_means |> 
  dplyr::mutate(Estimates = "Counterfactual") |> 
  dplyr::bind_rows(Result_f)

Result_f <- Result_f |> dplyr::mutate(Sex = "Female")

close_value_f <- estimate_female$change_disparities |> dplyr::filter(Education == "University or more - Less than university" & change_type == "additive") |> select(estimate) |> unlist()

close_prop_f <- estimate_female$change_disparities |> dplyr::filter(Education == "University or more - Less than university" & change_type == "proportional") |> select(estimate) |> unlist()


# Integrated Results -------------------------------------------------------

Result <- Result |> dplyr::bind_rows(Result_m) |> dplyr::bind_rows(Result_f)

Result <- Result |> dplyr::mutate(close_prop = dplyr::case_when(Sex == "Overall" ~ close_prop_oa, 
                                                                Sex == "Male" ~ close_prop_m, 
                                                                Sex == "Female" ~ close_prop_f), 
                                  close_value = dplyr::case_when(Sex == "Overall" ~ close_value_oa, 
                                                                 Sex == "Male" ~ close_value_m, 
                                                                 Sex == "Female" ~ close_value_f))

Result <- Result |> 
  dplyr::filter(Estimates == "Factural") |> 
  dplyr::group_by(Sex, Education) |> 
  dplyr::summarise(beginning = mean(estimate)) |> 
  ungroup() |> 
  dplyr::inner_join(Result, by = c("Sex", "Education"))

Result <- Result |> 
  dplyr::filter(Estimates == "Counterfactual") |> 
  dplyr::group_by(Sex, Education) |> 
  dplyr::summarise(end = mean(estimate)) |> 
  ungroup() |> 
  dplyr::inner_join(Result, by = c("Sex", "Education"))

# Visualize the results -------------------------------------------------------

Result |> 
  dplyr::mutate(Estimates = forcats::fct_relevel(Estimates, "Factural", "Counterfactual"), 
                Sex = forcats::fct_relevel(Sex, "Overall", "Male", "Female"), 
                Education = forcats::fct_relevel(Education, "University or more", "Less than university")) |> 
  dplyr::rename(`Individuals' education` = Education) |> 
  dplyr::group_by(Estimates) |> 
  dplyr::ungroup() |> 
  ggplot2::ggplot(aes(x = Estimates, y = estimate, ymin = ci.min, ymax = ci.max)) + 
  ggplot2::geom_point(aes(shape = `Individuals' education`), position = ggplot2::position_dodge(width = .1), size = 2.5) + 
  ggplot2::geom_errorbar(aes(shape = `Individuals' education`), position = ggplot2::position_dodge(width = .1), width = 0.1) + 
  ggplot2::geom_segment(aes(x = c(1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05, 1.95, 1.05), xend = 1.5,
                            y = estimate, yend = estimate)) +
  ggplot2::geom_segment(aes(x = 1.5, xend = 1.5,
                            y = beginning, yend = end), 
                        linetype = "dashed", 
                        linewidth = .5) +
  ggplot2::geom_label(aes(x = 1.5, label = paste0("absolute: ", round(close_value, 2), "pt", "\n", "proportional: ", round((close_prop * 100), 1), "%", "\n", "gap closed"),
                          y = 2.95),
                      position = ggplot2::position_dodge(width = .1),
                      size = 5,
                      show.legend = FALSE) +
  ggplot2::facet_wrap(~ Sex) + 
  ggplot2::labs(x = "Settings", y = "Mean of Subjective Health") + 
  ggplot2::theme_bw(base_size = 18) + 
  ggplot2::scale_shape_manual(values = c(4, 1)) + 
  ggplot2::theme(legend.position = "top") + 
  ggplot2::scale_y_continuous(limits = c(2.9, 3.7))

# _____________----

# Function-wide estimator ----

# Those who have children who is over 25 years old

set.seed(777)

ssm_filtered <- ssm |> 
  dplyr::filter(Age >= 50)

ssm_filtered <- ssm_filtered |> 
  dplyr::filter(ChildNum >= 1 & Child1_25 == 1)

ssm_filtered <- ssm_filtered |> 
  dplyr::select(id, SubjHealth, ParentHighEdu, ChildCollege_Max, Sex, BirthCohort, Age, Education, Place_15, Jsei_Fj, EmpStatus_Fj, Unemp, MarStatus, PastHealthIssue)

ssm_filtered <- ssm_filtered |> 
  tidyr::drop_na()


assignment_overall <- ssm_filtered |> dplyr::filter(ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

## Treatment Model ----

### GLM ----

est_tr_glm <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_glm <- est_tr_glm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "NULL")


### Ridge ----

est_tr_ridge <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_ridge <- est_tr_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "NULL")

### GAM ----

est_tr_gam <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_gam <- est_tr_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "NULL")


### Random Forests ----

est_tr_ranger <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ranger",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_ranger <- est_tr_ranger$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Random_Forest", 
                Outcome = "NULL")

## Outcome Model ----

### GLM ----

est_out_lm <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    outcome_algorithm = "lm",
    treatment_name = "ChildCollege_Max",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_out_lm <- est_out_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "NULL", 
                Outcome = "OLS")

### Ridge ----

est_out_ridge <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    outcome_algorithm = "ridge",
    treatment_name = "ChildCollege_Max",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_out_ridge <- est_out_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "NULL", 
                Outcome = "Ridge")

### GAM ----

est_out_gam <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    outcome_algorithm = "gam",
    treatment_name = "ChildCollege_Max",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_out_gam <- est_out_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "NULL", 
                Outcome = "GAM")


## Doubly robust ----

### GLM ----

#### GLM/LM ----

est_dr_glm_lm <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_algorithm = "lm",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_glm_lm <- est_dr_glm_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "OLS")

#### GLM/Ridge ----

est_dr_glm_ridge <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_algorithm = "ridge",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_glm_ridge <- est_dr_glm_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "Ridge")

#### GLM/GAM ----

est_dr_glm_gam <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_glm_gam <- est_dr_glm_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "GAM")


### Ridge ----

#### Ridge/LM----

est_dr_ridge_lm <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_algorithm = "lm",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_ridge_lm <- est_dr_ridge_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "OLS")

#### Ridge/Ridge----

est_dr_ridge_ridge <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_algorithm = "ridge",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_ridge_ridge <- est_dr_ridge_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "Ridge")

#### Ridge/GAM----

est_dr_ridge_gam <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_ridge_gam <- est_dr_ridge_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "GAM")


### GAM ----

#### GAM/LM ----

est_dr_gam_lm <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "lm",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_gam_lm <- est_dr_gam_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "OLS")

#### GAM/Ridge ----

est_dr_gam_ridge <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "ridge",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_gam_ridge <- est_dr_gam_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "Ridge")

#### GAM/GAM ----

est_dr_gam_gam <- ssm_filtered |> 
  gapclosing(
    counterfactual_assignments = assignment_overall,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Sex + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_gam_gam <- est_dr_gam_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "GAM")

# Combine all estimator ----

function_multiverse <- dplyr::bind_rows(list(est_tr_glm, est_tr_ridge, est_tr_gam, est_out_lm, est_out_ridge, est_out_gam, est_dr_glm_lm, est_dr_glm_ridge, est_dr_glm_gam, est_dr_ridge_lm, est_dr_ridge_ridge, est_dr_ridge_gam, est_dr_gam_lm, est_dr_gam_ridge, est_dr_gam_gam))

function_multiverse <- function_multiverse |> 
  tidyr::pivot_wider(names_from = "change_type", 
                     values_from = "estimate") |> 
  dplyr::mutate(proportional = round(proportional * 100, 1), 
                Treatment = forcats::fct_relevel(Treatment, c("NULL", "GLM", "Ridge", "GAM")), 
                Outcome = forcats::fct_relevel(Outcome, c("NULL", "OLS", "Ridge", "GAM"))) |> 
  ggplot2::ggplot(ggplot2::aes(x = Treatment, y = Outcome, fill = proportional, label = paste0(proportional, " %", "\n", round(additive, 2), " pt"))) +
  ggplot2::geom_tile() +
  ggplot2::scale_fill_gradient(low = "white", high = "steelblue") +
  ggplot2::geom_text(size = 10, color = "grey20") + 
  ggplot2::theme_minimal(base_size = 16) + 
  ggplot2::theme(legend.position="none")

ggsave("../fig/multiverse_function.png", function_multiverse, width = 10, height = 10)

# MALE | Function-wide estimator ----

ssm_filtered_male <- ssm_filtered |> dplyr::filter(Sex == "Male")

assignment_male <- ssm_filtered_male |> dplyr::filter(ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()


## Treatment Model ----

### GLM ----

est_tr_glm <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_glm <- est_tr_glm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "NULL")


### Ridge ----

est_tr_ridge <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_ridge <- est_tr_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "NULL")

### GAM ----

est_tr_gam <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_gam <- est_tr_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "NULL")


### Random Forests ----

est_tr_ranger <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ranger",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_ranger <- est_tr_ranger$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Random_Forest", 
                Outcome = "NULL")

## Outcome Model ----

### GLM ----

est_out_lm <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    outcome_algorithm = "lm",
    treatment_name = "ChildCollege_Max",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_out_lm <- est_out_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "NULL", 
                Outcome = "OLS")

### Ridge ----

est_out_ridge <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    outcome_algorithm = "ridge",
    treatment_name = "ChildCollege_Max",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_out_ridge <- est_out_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "NULL", 
                Outcome = "Ridge")

### GAM ----

est_out_gam <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    outcome_algorithm = "gam",
    treatment_name = "ChildCollege_Max",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_out_gam <- est_out_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "NULL", 
                Outcome = "GAM")


## Doubly robust ----

### GLM ----

#### GLM/LM ----

est_dr_glm_lm <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_algorithm = "lm",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_glm_lm <- est_dr_glm_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "OLS")

#### GLM/Ridge ----

est_dr_glm_ridge <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_algorithm = "ridge",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_glm_ridge <- est_dr_glm_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "Ridge")

#### GLM/GAM ----

est_dr_glm_gam <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_glm_gam <- est_dr_glm_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "GAM")


### Ridge ----

#### Ridge/LM----

est_dr_ridge_lm <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_algorithm = "lm",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_ridge_lm <- est_dr_ridge_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "OLS")

#### Ridge/Ridge----

est_dr_ridge_ridge <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_algorithm = "ridge",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_ridge_ridge <- est_dr_ridge_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "Ridge")

#### Ridge/GAM----

est_dr_ridge_gam <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_ridge_gam <- est_dr_ridge_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "GAM")


### GAM ----

#### GAM/LM ----

est_dr_gam_lm <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "lm",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_gam_lm <- est_dr_gam_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "OLS")

#### GAM/Ridge ----

est_dr_gam_ridge <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "ridge",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_gam_ridge <- est_dr_gam_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "Ridge")

#### GAM/GAM ----

est_dr_gam_gam <- ssm_filtered_male |> 
  gapclosing(
counterfactual_assignments = assignment_male,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_gam_gam <- est_dr_gam_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "GAM")

# Combine all estimator ----

function_multiverse <- dplyr::bind_rows(list(est_tr_glm, est_tr_ridge, est_tr_gam, est_out_lm, est_out_ridge, est_out_gam, est_dr_glm_lm, est_dr_glm_ridge, est_dr_glm_gam, est_dr_ridge_lm, est_dr_ridge_ridge, est_dr_ridge_gam, est_dr_gam_lm, est_dr_gam_ridge, est_dr_gam_gam))

function_multiverse <- function_multiverse |> 
  tidyr::pivot_wider(names_from = "change_type", 
                     values_from = "estimate") |> 
  dplyr::mutate(proportional = round(proportional * 100, 1), 
                Treatment = forcats::fct_relevel(Treatment, c("NULL", "GLM", "Ridge", "GAM")), 
                Outcome = forcats::fct_relevel(Outcome, c("NULL", "OLS", "Ridge", "GAM"))) |> 
  ggplot2::ggplot(ggplot2::aes(x = Treatment, y = Outcome, fill = proportional, label = paste0(proportional, " %", "\n", round(additive, 2), " pt"))) +
  ggplot2::geom_tile() +
  ggplot2::scale_fill_gradient(low = "white", high = "steelblue") +
  ggplot2::geom_text(size = 10, color = "grey20") + 
  ggplot2::theme_minimal(base_size = 16) + 
  ggplot2::theme(legend.position="none")

ggsave("../fig/multiverse_function_male.png", function_multiverse, width = 10, height = 10)

# FEMALE | Function-wide estimator ----

ssm_filtered_female <- ssm_filtered |> dplyr::filter(Sex == "Female")

assignment_female <- ssm_filtered_female |> dplyr::filter(ParentHighEdu == "High") |> dplyr::summarise(mean(ChildCollege_Max)) |> unlist()

## Treatment Model ----

### GLM ----

est_tr_glm <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_glm <- est_tr_glm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "NULL")


### Ridge ----

est_tr_ridge <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_ridge <- est_tr_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "NULL")

### GAM ----

est_tr_gam <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_gam <- est_tr_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "NULL")


### Random Forests ----

est_tr_ranger <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ranger",
    outcome_name = "SubjHealth",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_tr_ranger <- est_tr_ranger$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Random_Forest", 
                Outcome = "NULL")

## Outcome Model ----

### GLM ----

est_out_lm <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    outcome_algorithm = "lm",
    treatment_name = "ChildCollege_Max",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_out_lm <- est_out_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "NULL", 
                Outcome = "OLS")

### Ridge ----

est_out_ridge <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    outcome_algorithm = "ridge",
    treatment_name = "ChildCollege_Max",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_out_ridge <- est_out_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "NULL", 
                Outcome = "Ridge")

### GAM ----

est_out_gam <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    outcome_algorithm = "gam",
    treatment_name = "ChildCollege_Max",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_out_gam <- est_out_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "NULL", 
                Outcome = "GAM")


## Doubly robust ----

### GLM ----

#### GLM/LM ----

est_dr_glm_lm <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_algorithm = "lm",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_glm_lm <- est_dr_glm_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "OLS")

#### GLM/Ridge ----

est_dr_glm_ridge <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_algorithm = "ridge",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_glm_ridge <- est_dr_glm_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "Ridge")

#### GLM/GAM ----

est_dr_glm_gam <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "glm",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_glm_gam <- est_dr_glm_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GLM", 
                Outcome = "GAM")


### Ridge ----

#### Ridge/LM----

est_dr_ridge_lm <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_algorithm = "lm",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_ridge_lm <- est_dr_ridge_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "OLS")

#### Ridge/Ridge----

est_dr_ridge_ridge <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_algorithm = "ridge",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_ridge_ridge <- est_dr_ridge_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "Ridge")

#### Ridge/GAM----

est_dr_ridge_gam <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "ridge",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_ridge_gam <- est_dr_ridge_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "Ridge", 
                Outcome = "GAM")


### GAM ----

#### GAM/LM ----

est_dr_gam_lm <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "lm",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_gam_lm <- est_dr_gam_lm$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "OLS")

#### GAM/Ridge ----

est_dr_gam_ridge <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "ridge",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_gam_ridge <- est_dr_gam_ridge$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "Ridge")

#### GAM/GAM ----

est_dr_gam_gam <- ssm_filtered_female |> 
  gapclosing(
    counterfactual_assignments = assignment_female,
    treatment_formula = formula(ChildCollege_Max ~ BirthCohort + Age + Education + Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu),
    outcome_formula = formula(SubjHealth ~ BirthCohort + Age + Education * Place_15 + Jsei_Fj + EmpStatus_Fj + Unemp + MarStatus + PastHealthIssue + ParentHighEdu + ChildCollege_Max),
    category_name = "ParentHighEdu",
    treatment_algorithm = "gam",
    outcome_algorithm = "gam",
    sample_split = "single_sample",
    se = TRUE,
    bootstrap_samples = 1000,
    parallel_cores = 10
  )

est_dr_gam_gam <- est_dr_gam_gam$change_disparities |> 
  dplyr::filter(ParentHighEdu == "High - Low") |> 
  dplyr::select(estimate, change_type) |> 
  dplyr::mutate(Treatment = "GAM", 
                Outcome = "GAM")

# Combine all estimator ----

function_multiverse <- dplyr::bind_rows(list(est_tr_glm, est_tr_ridge, est_tr_gam, est_out_lm, est_out_ridge, est_out_gam, est_dr_glm_lm, est_dr_glm_ridge, est_dr_glm_gam, est_dr_ridge_lm, est_dr_ridge_ridge, est_dr_ridge_gam, est_dr_gam_lm, est_dr_gam_ridge, est_dr_gam_gam))

function_multiverse <- function_multiverse |> 
  tidyr::pivot_wider(names_from = "change_type", 
                     values_from = "estimate") |> 
  dplyr::mutate(proportional = round(proportional * 100, 1), 
                Treatment = forcats::fct_relevel(Treatment, c("NULL", "GLM", "Ridge", "GAM")), 
                Outcome = forcats::fct_relevel(Outcome, c("NULL", "OLS", "Ridge", "GAM"))) |> 
  ggplot2::ggplot(ggplot2::aes(x = Treatment, y = Outcome, fill = proportional, label = paste0(proportional, " %", "\n", round(additive, 2), " pt"))) +
  ggplot2::geom_tile() +
  ggplot2::scale_fill_gradient(low = "white", high = "steelblue") +
  ggplot2::geom_text(size = 10, color = "grey20") + 
  ggplot2::theme_minimal(base_size = 16) + 
  ggplot2::theme(legend.position="none")

ggsave("../fig/functionalmultiverse_female.png", function_multiverse, width = 10, height = 10)
